## Understanding Public Attitudes toward Restrictive Voting Laws
## Katie Clayton  
## Step 3: Calculate sample demographics and balance
## Last updated: June 8, 2023


# Initial settings --------------------------------------------------------

rm(list = ls())
library(tidyverse)
library(stargazer)


# Read data ---------------------------------------------------------------

df <- read.csv("output/cleaned_dataset.csv") %>% 
  rename(age = `age...13`,
         gender = `gender...12`,
         education = `education...14`) %>% 
  select(age, gender, education, race, pid, rep.dum, turnout_2020, vote_2020, group) %>% 
  mutate(turnout_2020 = ifelse(turnout_2020 > 1 & !is.na(turnout_2020), 2, 1)) 


# Calculate group N -------------------------------------------------------


age <- df %>% 
  group_by(group) %>% 
  count(age) %>% 
  filter(!is.na(age)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Age")

age.full <- df %>% 
  count(age) %>% 
  filter(!is.na(age)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Age") 

gender <- df %>% 
  group_by(group) %>% 
  count(gender) %>% 
  filter(!is.na(gender)) %>% 
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Gender") 

gender.full <- df %>% 
  count(gender) %>% 
  filter(!is.na(gender)) %>% 
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Gender") 

education <- df %>% 
  group_by(group) %>% 
  count(education) %>% 
  filter(!is.na(education)) %>% 
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Education") 

education.full <- df %>% 
  count(education) %>% 
  filter(!is.na(education)) %>% 
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Education") 

race <- df %>% 
  group_by(group) %>% 
  count(race) %>% 
  filter(!is.na(race)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Race") 

race.full <- df %>% 
  count(race) %>% 
  filter(!is.na(race)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Race") 

party <- df %>% 
  group_by(group) %>% 
  count(rep.dum) %>% 
  filter(!is.na(rep.dum)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Party") 

party.full <- df %>% 
  count(rep.dum) %>% 
  filter(!is.na(rep.dum)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "Party") 

turnout <- df %>% 
  group_by(group) %>% 
  count(turnout_2020) %>% 
  filter(!is.na(turnout_2020)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "2020 turnout") 

turnout.full <- df %>% 
  count(turnout_2020) %>% 
  filter(!is.na(turnout_2020)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "2020 turnout") 

vote <- df %>% 
  group_by(group) %>% 
  count(vote_2020) %>% 
  filter(!is.na(vote_2020)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "2020 vote choice") 

vote.full <- df %>% 
  count(vote_2020) %>% 
  filter(!is.na(vote_2020)) %>% # Exclude NA
  mutate(pct = 100 * (n / sum(n)),
         pct = round(pct, 1),
         pct = format(pct, nsmall = 1),
         pct = paste0(pct, "%")) %>% 
  mutate(var = "2020 vote choice") 


# Calculate balance -------------------------------------------------------

control <- rbind(subset(age, group == "control"),
                 subset(gender, group == "control"),
                 subset(education, group == "control"),
                 subset(race, group == "control"),
                 subset(party, group == "control"),
                 subset(turnout, group == "control"),
                 subset(vote, group == "control"))

harm <- rbind(subset(age, group == "harm"),
              subset(gender, group == "harm"),
              subset(education, group == "harm"),
              subset(race, group == "harm"),
              subset(party, group == "harm"),
              subset(turnout, group == "harm"),
              subset(vote, group == "harm"))

boost <- rbind(subset(age, group == "boost"),
               subset(gender, group == "boost"),
               subset(education, group == "boost"),
               subset(race, group == "boost"),
               subset(party, group == "boost"),
               subset(turnout, group == "boost"),
               subset(vote, group == "boost"))


# Combine into table ------------------------------------------------------

full <- c(age.full$pct, gender.full$pct, education.full$pct, race.full$pct, party.full$pct, turnout.full$pct, vote.full$pct)


levels <- c("18-34", "35-44", "45-54", "55-64", "65 or older",
            "Man", "Woman", "Other",
            "Some high school or less",
            "High school degree", 
            "Some college",
            "2-year degree",
            "4-year degree",
            "Post-graduate degree",
            "White", 
            "Black or African American", 
            "American Indian or Alaska Native",
            "Asian or Pacific Islander", 
            "Hispanic or Latino",
            "Other",
            "Democrat","Republican",
            "Voted", "Did not vote",
            "Joe Biden","Donald Trump","Another candidate")

combined <- cbind(levels, control$pct, harm$pct, boost$pct, full)
colnames(combined) <- c("Levels", "Control", "Harm minorities", "Boost minority turnout", "Total")

stargazer(combined, 
          summary = FALSE,
          rownames = FALSE)
